1 /**
2  * SIMD-accelerated Set
3  * Copyright: © 2015 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost License 1.0)
6  */
7 module containers.simdset;
8 
9 private import std.experimental.allocator.mallocator : Mallocator;
10 
11 /**
12  * Set implementation that is well suited for small sets and simple items.
13  *
14  * Uses SSE instructions to compare multiple elements simultaneously, but has
15  * linear time complexity.
16  *
17  * Note: Only works on x86_64. Does NOT add GC ranges. Do not store pointers in
18  * this container unless they are also stored somewhere else.
19  *
20  * Params:
21  *     T = the element type
22  *     Allocator = the allocator to use. Defaults to `Mallocator`.
23  */
24 version (D_InlineAsm_X86_64) struct SimdSet(T, Allocator = Mallocator)
25 	if (T.sizeof == 1 || T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8)
26 {
27 	this(this) @disable;
28 
29 	private import std.experimental.allocator.common : stateSize;
30 
31 	static if (stateSize!Allocator != 0)
32 	{
33 		/// No default construction if an allocator must be provided.
34 		this() @disable;
35 
36 		/**
37 		 * Use the given `allocator` for allocations.
38 		 */
39 		this(Allocator allocator)
40 		in
41 		{
42 			assert(allocator !is null, "Allocator must not be null");
43 		}
44 		do
45 		{
46 			this.allocator = allocator;
47 		}
48 	}
49 
50 	~this()
51 	{
52 		scope (failure) assert(false, "SimdSet destructor threw an exception");
53 		clear();
54 	}
55 
56 	void clear()
57 	{
58 		allocator.deallocate(cast(void[]) storage);
59 		_length = 0;
60 		storage = [];
61 	}
62 
63 	/**
64 	 * Params:
65 	 *     item = the item to check
66 	 * Returns:
67 	 *     true if the set contains the given item
68 	 */
69 	bool contains(T item) const pure nothrow @nogc @trusted
70 	{
71 		if (_length == 0)
72 			return false;
73 		bool retVal;
74 		immutable remainder = _length % (16 / T.sizeof);
75 		ushort mask = remainder == 0 ? 0xffff : (1 << (remainder * T.sizeof)) - 1;
76 		//ushort resultMask;
77 		ulong ptrStart = cast(ulong) storage.ptr;
78 		ulong ptrEnd = ptrStart + storage.length * T.sizeof;
79 		static if (T.sizeof == 1)
80 			ulong needle = (cast(ubyte) item) * 0x01010101_01010101;
81 		else static if (T.sizeof == 2)
82 			ulong needle = (cast(ushort) item) * 0x00010001_00010001;
83 		else static if (T.sizeof == 4)
84 			ulong needle = (cast(ulong) item) * 0x00000001_00000001;
85 		else static if (T.sizeof == 8)
86 			ulong needle = cast(ulong) item;
87 		else
88 			static assert(false);
89 		mixin(asmSearch());
90 	end:
91 		return retVal;
92 	}
93 
94 	/// ditto
95 	bool opBinaryRight(string op)(T item) const pure nothrow @nogc @safe if (op == "in")
96 	{
97 		return contains(item);
98 	}
99 
100 	/**
101 	 * Inserts the given item into the set.
102 	 *
103 	 * Params:
104 	 *     item = the item to insert
105 	 * Returns:
106 	 *     true if the item was inserted or false if it was already present
107 	 */
108 	bool insert(T item)
109 	{
110 		if (contains(item))
111 			return false;
112 		if (storage.length > _length)
113 			storage[_length] = item;
114 		else
115 		{
116 			immutable size_t cl = (storage.length * T.sizeof);
117 			immutable size_t nl = cl + 16;
118 			void[] a = cast(void[]) storage;
119 			allocator.reallocate(a, nl);
120 			storage = cast(typeof(storage)) a;
121 			storage[_length] = item;
122 		}
123 		_length++;
124 		return true;
125 	}
126 
127 	/// ditto
128 	bool opOpAssign(string op)(T item) if (op == "~")
129 	{
130 		return insert(item);
131 	}
132 
133 	/// ditto
134 	alias insertAnywhere = insert;
135 
136 	/// ditto
137 	alias put = insert;
138 
139 	/**
140 	 * Removes the given item from the set.
141 	 *
142 	 * Params:
143 	 *     item = the time to remove
144 	 * Returns:
145 	 *     true if the item was removed, false if it was not present
146 	 */
147 	bool remove(T item)
148 	{
149 		import std.algorithm : countUntil;
150 
151 		// TODO: Make this more efficient
152 
153 		ptrdiff_t begin = countUntil(storage, item);
154 		if (begin == -1)
155 			return false;
156 		foreach (i; begin .. _length - 1)
157 			storage[i] = storage[i + 1];
158 		_length--;
159 		return true;
160 	}
161 
162 	/**
163 	 * Slice operator
164 	 */
165 	auto opSlice(this This)()
166 	{
167 		import containers.internal.element_type : ContainerElementType;
168 		return cast(ContainerElementType!(This, T)[]) storage[0 .. _length];
169 	}
170 
171 	/**
172 	 * Returns:
173 	 *     the number of items in the set
174 	 */
175 	size_t length() const pure nothrow @nogc @property
176 	{
177 		return _length;
178 	}
179 
180 	invariant
181 	{
182 		assert((storage.length * T.sizeof) % 16 == 0, "Bad storage length");
183 	}
184 
185 private:
186 
187 	import containers.internal.storage_type : ContainerStorageType;
188 	private import containers.internal.mixins : AllocatorState;
189 
190 	static string asmSearch()
191 	{
192 		import std.string : format;
193 
194 		static if (T.sizeof == 1)
195 			enum instruction = `pcmpeqb`;
196 		else static if (T.sizeof == 2)
197 			enum instruction = `pcmpeqw`;
198 		else static if (T.sizeof == 4)
199 			enum instruction = `pcmpeqd`;
200 		else static if (T.sizeof == 8)
201 			enum instruction = `pcmpeqq`;
202 		else
203 			static assert(false);
204 
205 		static if (__VERSION__ >= 2067)
206 			string s = `asm pure nothrow @nogc`;
207 		else
208 			string s = `asm`;
209 
210 		return s ~ `
211 		{
212 			mov R8, ptrStart;
213 			mov R9, ptrEnd;
214 			sub R8, 16;
215 			sub R9, 16;
216 			movq XMM0, needle;
217 			shufpd XMM0, XMM0, 0;
218 		loop:
219 			add R8, 16;
220 			movdqu XMM1, [R8];
221 			%s XMM1, XMM0;
222 			pmovmskb RAX, XMM1;
223 			//mov resultMask, AX;
224 			mov BX, AX;
225 			and BX, mask;
226 			cmp R8, R9;
227 			cmove AX, BX;
228 			popcnt AX, AX;
229 			test AX, AX;
230 			jnz found;
231 			cmp R8, R9;
232 			jl loop;
233 			mov retVal, 0;
234 			jmp end;
235 		found:
236 			mov retVal, 1;
237 			jmp end;
238 		}`.format(instruction);
239 	}
240 
241 	mixin AllocatorState!Allocator;
242 	ContainerStorageType!(T)[] storage;
243 	size_t _length;
244 }
245 
246 ///
247 version (D_InlineAsm_X86_64) version(emsi_containers_unittest) unittest
248 {
249 	import std.string : format;
250 
251 	void testSimdSet(T)()
252 	{
253 		SimdSet!T set;
254 		assert(set.insert(1));
255 		assert(set.length == 1);
256 		assert(set.contains(1));
257 		assert(!set.insert(1));
258 		set.insert(0);
259 		set.insert(20);
260 		assert(set.contains(1));
261 		assert(set.contains(0));
262 		assert(!set.contains(10));
263 		assert(!set.contains(50));
264 		assert(set.contains(20));
265 		foreach (T i; 28 .. 127)
266 			set.insert(i);
267 		foreach (T i; 28 .. 127)
268 			assert(set.contains(i), "%d".format(i));
269 		foreach (T i; 28 .. 127)
270 			assert(set.remove(i));
271 		assert(set.length == 3, "%d".format(set.length));
272 		assert(set.contains(0));
273 		assert(set.contains(1));
274 		assert(set.contains(20));
275 		assert(!set.contains(28));
276 	}
277 
278 	testSimdSet!ubyte();
279 	testSimdSet!ushort();
280 	testSimdSet!uint();
281 	testSimdSet!ulong();
282 	testSimdSet!byte();
283 	testSimdSet!short();
284 	testSimdSet!int();
285 	testSimdSet!long();
286 }
287 
288 version (D_InlineAsm_X86_64) struct SimdSet(T) if (!(T.sizeof == 1
289 	|| T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8))
290 {
291 	import std.string : format;
292 	static assert (false, ("Cannot instantiate SimdSet of type %s because its size "
293 		~ "(%d) does not fit evenly into XMM registers.").format(T.stringof, T.sizeof));
294 }